# File:	    9_FigureA7-A10a_spec1.R
# Project:	Colonial legacy and foreign aid: Decomposing the colonial bias
# Authors:	Daina Chiba, Department of Government, University of Essex
#           Tobias Heinrich, Department of Political Science, University of South Carolina
# Contact:	dchiba@essex.ac.uk
# Date:     19 November, 2018

# Description: 
# This R script file replicates the results reported in Figures A.7-A.10
# in Appendix H and I.

# Preparation -------------------------------------------------------------
rm(list=ls())
library(foreign)
library(MCMCglmm)

# Define auxiliary functions
source("aux_functions.R")


# Data manipulation -------------------------------------------------------
fa.data <- read.dta("colony-dummy.dta")

## log(0)
fa.data $ tobitDV <- fa.data $ lnGrossAid - log(.001)
fa.data $ tobitDV[fa.data $ AnyGrossAid == 0] <- 0

## UK only
fa.data <- fa.data[fa.data $ ccode == 200, ]

## Listwise delete missing values
fa.data.nna <- fa.data[, c("tobitDV", "RA", "RB", "WBdum2", "WBdum3", "WBdum4","WBdum5", "WB",
                           "colony", "ccode", "rCCODE",
                           "lnDIS", "ColdWar", "lnPOPB", "dyad", "year", "rCNAME","dCNAME")]
fa.data.nna <- na.omit(fa.data.nna)
fa.data.nna $ RB2 <- fa.data.nna $ RB^2

## time variables
fa.data.nna $ yearid <- fa.data.nna $ year - 1959
fa.data.nna $ yearid2 <- fa.data.nna $ yearid^2
fa.data.nna $ yearid3 <- fa.data.nna $ yearid^3
fa.data.nna $ time <- (fa.data.nna $ year - mean(fa.data.nna $ year))/sd(fa.data.nna $ year)
fa.data.nna $ time2 <- (fa.data.nna $ time)^2
fa.data.nna $ time3 <- (fa.data.nna $ time)^3

## donor
length(unique(fa.data.nna $ dCNAME))
fa.data.nna $ donor <- as.numeric(as.factor(fa.data.nna $ dCNAME))

## recipient
length(unique(fa.data.nna $ rCNAME))
fa.data.nna $ recip <- as.numeric(as.factor(fa.data.nna $ rCNAME))

## Censoring
fa.data.nna $ YLow <- ifelse(fa.data.nna $ tobitDV == 0, -Inf, fa.data.nna $ tobitDV)

fa.data.nna.1 <- fa.data.nna[fa.data.nna $ colony == 1, ]
fa.data.nna.0 <- fa.data.nna[fa.data.nna $ colony == 0, ]

nrow(fa.data.nna)
nrow(fa.data.nna.1)
nrow(fa.data.nna.0)

mean(fa.data.nna.1 $ tobitDV)
mean(fa.data.nna.0 $ tobitDV)



# Tobit models ------------------------------------------------------------

## Pooled

set.seed(123456789)
spec1.p <- MCMCglmm(
  fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WBdum2 + WBdum3 + WBdum4 + WBdum5 + 
    ColdWar + lnDIS + lnPOPB + time + time2 + time3 + 
    colony,
  data = fa.data.nna,
  nitt = 11000,
  burnin = 1000,
  prior=list(R = list(V=1, nu=.001)),
  family="cengaussian",
  thin = 10)

summary(spec1.p)


## Colony == 1

set.seed(123456789)
spec1.1 <- MCMCglmm(
  fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WBdum2 + WBdum3 + WBdum4 + WBdum5 + 
    ColdWar + lnDIS + lnPOPB + time + time2 + time3,
  data = fa.data.nna.1,
  nitt = 11000,
  burnin = 1000,
  prior=list(R = list(V=1, nu=.001)),
  family="cengaussian",
  thin = 10)

## Colony == 0

set.seed(123456789)
spec1.0 <- MCMCglmm(
  fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WBdum2 + WBdum3 + WBdum4 + WBdum5 + 
    ColdWar + lnDIS + lnPOPB + time + time2 + time3,
  data = fa.data.nna.0,
  nitt = 11000,
  burnin = 1000,
  prior=list(R = list(V=1, nu=.001)),
  family="cengaussian",
  thin = 10)

summary(spec1.p)
summary(spec1.1)
summary(spec1.0)

# Decomposition analyses --------------------------------------------------

# Calculate aggregate observables

calculate.qoi.uk <- function(data, group.var, fit.0, fit.1){

  ## Extract beta, v-cov matrix, and variables
  
  coef.0 <- summary(fit.0 $ Sol)[[2]][,3]
  coef.1 <- summary(fit.1 $ Sol)[[2]][,3]
  data.x <- data[, names(coef.0)[-1]]
  x <- cbind(1, as.matrix(data.x))
  x.0 <- x[data[group.var]==0, ]
  x.1 <- x[data[group.var]==1, ]
  sigma.1 <- sqrt(median(fit.1 $ VCV[,1]))
  sigma.0 <- sqrt(median(fit.0 $ VCV[,1]))  
  
  ## QoI 1: Pr[ Y > 0 ] = Phi (XB / sigma)
  
  xb.x0.b0 <- x.0 %*% coef.0
  xb.x0.b1 <- x.0 %*% coef.1
  xb.x1.b0 <- x.1 %*% coef.0
  xb.x1.b1 <- x.1 %*% coef.1
  
  pry.x0.b0 <- mean( pnorm( xb.x0.b0 / sigma.0 ) )
  pry.x0.b1 <- mean( pnorm( xb.x0.b1 / sigma.1 ) )
  pry.x1.b0 <- mean( pnorm( xb.x1.b0 / sigma.0 ) )
  pry.x1.b1 <- mean( pnorm( xb.x1.b1 / sigma.1 ) )
  
  ## QoI 2: E[ Y | Y > 0 ] = XB + sigma * inv.mil, where inv.mil = dnorm(xb/sibma) / pnorm(xb/sigma)
  
  inv.mil.x0.b0 <- dnorm( xb.x0.b0 / sigma.0) / pnorm( xb.x0.b0 / sigma.0)
  inv.mil.x0.b1 <- dnorm( xb.x0.b1 / sigma.1) / pnorm( xb.x0.b1 / sigma.1)
  inv.mil.x1.b0 <- dnorm( xb.x1.b0 / sigma.0) / pnorm( xb.x1.b0 / sigma.0)
  inv.mil.x1.b1 <- dnorm( xb.x1.b1 / sigma.1) / pnorm( xb.x1.b1 / sigma.1)
  
  cy.x0.b0 <- mean( xb.x0.b0 + sigma.0 * inv.mil.x0.b0 )
  cy.x0.b1 <- mean( xb.x0.b1 + sigma.1 * inv.mil.x0.b1 )
  cy.x1.b0 <- mean( xb.x1.b0 + sigma.0 * inv.mil.x1.b0 )
  cy.x1.b1 <- mean( xb.x1.b1 + sigma.1 * inv.mil.x1.b1 )
  
  ## QoI 3: E[ Y ]
  
  ey.x0.b0 <- mean( pnorm( xb.x0.b0 / sigma.0 ) * (xb.x0.b0 + sigma.0 * inv.mil.x0.b0) )
  ey.x0.b1 <- mean( pnorm( xb.x0.b1 / sigma.1 ) * (xb.x0.b1 + sigma.1 * inv.mil.x0.b1) )
  ey.x1.b0 <- mean( pnorm( xb.x1.b0 / sigma.0 ) * (xb.x1.b0 + sigma.0 * inv.mil.x1.b0) )
  ey.x1.b1 <- mean( pnorm( xb.x1.b1 / sigma.1 ) * (xb.x1.b1 + sigma.1 * inv.mil.x1.b1) )
  
  out <- data.frame( pry.x0.b0 = pry.x0.b0, pry.x0.b1 = pry.x0.b1, 
                     pry.x1.b0 = pry.x1.b0, pry.x1.b1 = pry.x1.b1, 
                     cy.x0.b0 = cy.x0.b0, cy.x0.b1 = cy.x0.b1, 
                     cy.x1.b0 = cy.x1.b0, cy.x1.b1 = cy.x1.b1,
                     ey.x0.b0 = ey.x0.b0, ey.x0.b1 = ey.x0.b1, 
                     ey.x1.b0 = ey.x1.b0, ey.x1.b1 = ey.x1.b1)
  return(out)
}

calculate.qoi.full.uk <- function(data, group.var, fit.0, fit.1, beta.0, beta.1){

  ## Extract beta, v-cov matrix, and variables
  
  coef.0 <- summary(fit.0 $ Sol)[[2]][,3]
  coef.1 <- summary(fit.1 $ Sol)[[2]][,3]
  data.x <- data[, names(coef.0)[-1]]
  x <- cbind(1, as.matrix(data.x))
  x.0 <- x[data[group.var]==0, ]
  x.1 <- x[data[group.var]==1, ]
  sigma.1 <- sqrt(median(fit.1 $ VCV[,1]))
  sigma.0 <- sqrt(median(fit.0 $ VCV[,1]))  
  
  ## QoI 1: Pr[ Y > 0 ] = Phi (XB / sigma)
  
  xb.x0.b0 <- x.0 %*% beta.0
  xb.x0.b1 <- x.0 %*% beta.1
  xb.x1.b0 <- x.1 %*% beta.0
  xb.x1.b1 <- x.1 %*% beta.1
  
  pry.x0.b0 <- mean( pnorm( xb.x0.b0 / sigma.0 ) )
  pry.x0.b1 <- mean( pnorm( xb.x0.b1 / sigma.1 ) )
  pry.x1.b0 <- mean( pnorm( xb.x1.b0 / sigma.0 ) )
  pry.x1.b1 <- mean( pnorm( xb.x1.b1 / sigma.1 ) )
  
  ## QoI 2: E[ Y | Y > 0 ] = XB + sigma * inv.mil, where inv.mil = dnorm(xb/sibma) / pnorm(xb/sigma)
  
  inv.mil.x0.b0 <- dnorm( xb.x0.b0 / sigma.0) / pnorm( xb.x0.b0 / sigma.0)
  inv.mil.x0.b1 <- dnorm( xb.x0.b1 / sigma.1) / pnorm( xb.x0.b1 / sigma.1)
  inv.mil.x1.b0 <- dnorm( xb.x1.b0 / sigma.0) / pnorm( xb.x1.b0 / sigma.0)
  inv.mil.x1.b1 <- dnorm( xb.x1.b1 / sigma.1) / pnorm( xb.x1.b1 / sigma.1)
  
  cy.x0.b0 <- mean( xb.x0.b0 + sigma.0 * inv.mil.x0.b0 )
  cy.x0.b1 <- mean( xb.x0.b1 + sigma.1 * inv.mil.x0.b1 )
  cy.x1.b0 <- mean( xb.x1.b0 + sigma.0 * inv.mil.x1.b0 )
  cy.x1.b1 <- mean( xb.x1.b1 + sigma.1 * inv.mil.x1.b1 )
  
  ## QoI 3: E[ Y ]
  
  ey.x0.b0 <- mean( pnorm( xb.x0.b0 / sigma.0 ) * (xb.x0.b0 + sigma.0 * inv.mil.x0.b0) )
  ey.x0.b1 <- mean( pnorm( xb.x0.b1 / sigma.1 ) * (xb.x0.b1 + sigma.1 * inv.mil.x0.b1) )
  ey.x1.b0 <- mean( pnorm( xb.x1.b0 / sigma.0 ) * (xb.x1.b0 + sigma.0 * inv.mil.x1.b0) )
  ey.x1.b1 <- mean( pnorm( xb.x1.b1 / sigma.1 ) * (xb.x1.b1 + sigma.1 * inv.mil.x1.b1) )
  
  out <- data.frame( pry.x0.b0 = pry.x0.b0, pry.x0.b1 = pry.x0.b1, 
                     pry.x1.b0 = pry.x1.b0, pry.x1.b1 = pry.x1.b1, 
                     cy.x0.b0 = cy.x0.b0, cy.x0.b1 = cy.x0.b1, 
                     cy.x1.b0 = cy.x1.b0, cy.x1.b1 = cy.x1.b1,
                     ey.x0.b0 = ey.x0.b0, ey.x0.b1 = ey.x0.b1, 
                     ey.x1.b0 = ey.x1.b0, ey.x1.b1 = ey.x1.b1)
  return(out)
}


(out <- calculate.qoi.uk(fa.data.nna, group.var = "colony", fit.0 = spec1.0, fit.1 = spec1.1))

## decomposition based on E(Y)
(total.diff.3 <- out $ ey.x1.b1 - out $ ey.x0.b0)
(obs.contrib.3 <- out $ ey.x1.b0 - out $ ey.x0.b0)
obs.contrib.3 / total.diff.3

## decomposition based on pr(y>0)
(total.diff.1 <- out $ pry.x1.b1 - out $ pry.x0.b0)
(obs.contrib.1 <- out $ pry.x1.b0 - out $ pry.x0.b0)
obs.contrib.1 / total.diff.1

## decomposition based on E(Y | Y > 0)
(total.diff.2 <- out $ cy.x1.b1 - out $ cy.x0.b0)
(obs.contrib.2 <- out $ cy.x1.b0 - out $ cy.x0.b0)
obs.contrib.2 / total.diff.2



# Posterior distribution of these QoIs ------------------------------------
nsim <- nrow(spec1.0[[1]])
pb <- txtProgressBar()

# combine results from 3 chains
beta.3c.1 <- spec1.1[[1]]
beta.3c.0 <- spec1.0[[1]]
qoi.list <- c()

for ( i in 1:nsim) {
  beta.0 <- beta.3c.0[ i, ]
  beta.1 <- beta.3c.1[ i, ]  
  qoi.list <- rbind(qoi.list, 
                    as.data.frame(
                      calculate.qoi.full.uk(fa.data.nna, 
                                         group.var = "colony", fit.0 = spec1.0, fit.1 = spec1.1, 
                                         beta.0 = beta.0, beta.1 = beta.1)))
  setTxtProgressBar(pb,i/nsim)
}

# Plot QOIs ---------------------------------------------------------------

# E(Y)
obs.diff <- qoi.list $ ey.x1.b0 - qoi.list $ ey.x0.b0
tot.diff <- qoi.list $ ey.x1.b1 - qoi.list $ ey.x0.b0
ey.obs.pct <- 100 * obs.diff/tot.diff

# Pr(Y>0)
obs.diff <- qoi.list $ pry.x1.b0 - qoi.list $ pry.x0.b0
tot.diff <- qoi.list $ pry.x1.b1 - qoi.list $ pry.x0.b0
pry.obs.pct <- 100 * obs.diff/tot.diff

# E (y | y > 0)
obs.diff <- qoi.list $ cy.x1.b0 - qoi.list $ cy.x0.b0
tot.diff <- qoi.list $ cy.x1.b1 - qoi.list $ cy.x0.b0
cy.obs.pct <- 100 * obs.diff/tot.diff

quantile(ey.obs.pct, probs = c(.05, .5, .95))
quantile(pry.obs.pct, probs = c(.05, .5, .95))
quantile(cy.obs.pct, probs = c(.05, .5, .95))

# > quantile(ey.obs.pct, probs = c(.05, .5, .95))
#         5%        50%        95% 
# -0.4862891  1.9790711  4.3827346 
# > quantile(pry.obs.pct, probs = c(.05, .5, .95))
#        5%       50%       95% 
#  3.970679 10.021495 15.248251 
# > quantile(cy.obs.pct, probs = c(.05, .5, .95))
#         5%        50%        95% 
# -0.4273172  1.7565548  3.9678066 

pdf(file="output/figures/FigureA9_BritsOnly.pdf", width=5.5, height=4)
par(mar=c(2,6,2,3))
plot(0,0, type="n", xlim = c(0, 100), ylim = c(0.75,9.5), axes=F,ann=F)
### shades
shadeColor <- c("gray80", "gray92")
## E(Y )
polygon(y = c(7,7,9,9), x = c(0, 
                              quantile(ey.obs.pct, probs = c(.5)), 
                              quantile(ey.obs.pct, probs = c(.5)), 
                              0), col= shadeColor[1], border=F)
polygon(y = c(7,7,9,9), x = c(quantile(ey.obs.pct, probs = c(.5)), 
                              100, 100, 
                              quantile(ey.obs.pct, probs = c(.5))), 
        col= shadeColor[2], border=F)
lines(x = quantile(ey.obs.pct, probs = c(.05, .95)), y = c(8,8))
lines(x = quantile(ey.obs.pct, probs = c(.05, .05)), y = c(7.75, 8.25))
lines(x = quantile(ey.obs.pct, probs = c(.95, .95)), y = c(7.75, 8.25))
points(x = quantile(ey.obs.pct, probs = .5), y = 8, pch = 19)
## Pr(Y>0)
polygon(y = c(4,4,6,6), x = c(0, quantile(pry.obs.pct, probs = .5), quantile(pry.obs.pct, probs = .5)
                              , 0), col= shadeColor[1], border=F)
polygon(y = c(4,4,6,6), x = c(quantile(pry.obs.pct, probs = .5), 
                              100, 100, quantile(pry.obs.pct, probs = .5)), col= shadeColor[2], border=F)
lines(x = quantile(pry.obs.pct, probs = c(.05, .95)), y = c(5,5))
lines(x = quantile(pry.obs.pct, probs = c(.05, .05)), y = c(4.75, 5.25))
lines(x = quantile(pry.obs.pct, probs = c(.95, .95)), y = c(4.75, 5.25))
points(x = quantile(pry.obs.pct, probs = .5), y = 5, pch = 19)
## E(Y | Y>0)
polygon(y = c(1,1,3,3), x = c(0, quantile(cy.obs.pct, probs = .5),
                              quantile(cy.obs.pct, probs = .5), 0), col= shadeColor[1], border=F)
polygon(y = c(1,1,3,3), x = c(quantile(cy.obs.pct, probs = .5), 
                              100, 100, quantile(cy.obs.pct, probs = .5)), col= shadeColor[2], border=F)
lines(x = quantile(cy.obs.pct, probs = c(.05, .95)), y = c(2,2))
lines(x = quantile(cy.obs.pct, probs = c(.05, .05)), y = c(1.75, 2.25))
lines(x = quantile(cy.obs.pct, probs = c(.95, .95)), y = c(1.75, 2.25))
points(x = quantile(cy.obs.pct, probs = .5), y = 2, pch = 19)

xlabd <- c("0 %", "25 %","50 %","75 %","100 %")
axis(1, cex.axis=1, mgp=c(3,.4,0), labels=xlabd,at=c(0, 25, 50, 75, 100))
axis(2, labels = c("E(Y | Y > 0)", "Pr(Y > 0)", "E(Y)"), at=c(2, 5, 8), las = 1, tick = F)
text(9.5, 9.5, "Observables")
text(50, 9.5, "Saliency")
dev.off()



# France only -------------------------------------------------------------

fa.data <- read.dta("colony-dummy.dta")

## log(0)
fa.data $ tobitDV <- fa.data $ lnGrossAid - log(.001)
fa.data $ tobitDV[fa.data $ AnyGrossAid == 0] <- 0

## France only
fa.data <- fa.data[fa.data $ ccode == 220, ]

## Listwise delete missing values
fa.data.nna <- fa.data[, c("tobitDV", "RA", "RB", "WBdum2", "WBdum3", "WBdum4","WBdum5", "WB",
                           "colony", "ccode", "rCCODE",
                           "lnDIS", "ColdWar", "lnPOPB", "dyad", "year", "rCNAME","dCNAME")]
fa.data.nna <- na.omit(fa.data.nna)
fa.data.nna $ RB2 <- fa.data.nna $ RB^2

## time variables
fa.data.nna $ yearid <- fa.data.nna $ year - 1959
fa.data.nna $ yearid2 <- fa.data.nna $ yearid^2
fa.data.nna $ yearid3 <- fa.data.nna $ yearid^3
fa.data.nna $ time <- (fa.data.nna $ year - mean(fa.data.nna $ year))/sd(fa.data.nna $ year)
fa.data.nna $ time2 <- (fa.data.nna $ time)^2
fa.data.nna $ time3 <- (fa.data.nna $ time)^3

## donor
length(unique(fa.data.nna $ dCNAME))
fa.data.nna $ donor <- as.numeric(as.factor(fa.data.nna $ dCNAME))

## recipient
length(unique(fa.data.nna $ rCNAME))
fa.data.nna $ recip <- as.numeric(as.factor(fa.data.nna $ rCNAME))

## Censoring
fa.data.nna $ YLow <- ifelse(fa.data.nna $ tobitDV == 0, -Inf, fa.data.nna $ tobitDV)

fa.data.nna.1 <- fa.data.nna[fa.data.nna $ colony == 1, ]
fa.data.nna.0 <- fa.data.nna[fa.data.nna $ colony == 0, ]

nrow(fa.data.nna)
nrow(fa.data.nna.1)
nrow(fa.data.nna.0)

mean(fa.data.nna.1 $ tobitDV)
mean(fa.data.nna.0 $ tobitDV)



# Theoretical variables only ----------------------------------------------

## Pooled

set.seed(123456789)
spec1.p <- MCMCglmm(
  fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WB + 
    ColdWar + lnDIS + lnPOPB + time + time2 + time3 + 
    colony,
  data = fa.data.nna,
  nitt = 11000,
  burnin = 1000,
  prior=list(R = list(V=1, nu=.001)),
  family="cengaussian",
  thin = 10)

summary(spec1.p)


## Colony == 1
set.seed(123456789)
spec1.1 <- MCMCglmm(
  fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WB + 
    ColdWar + lnDIS + lnPOPB + time + time2 + time3,
  data = fa.data.nna.1,
  nitt = 11000,
  burnin = 1000,
  prior=list(R = list(V=1, nu=.001)),
  family="cengaussian",
  thin = 10)

## Colony == 0
set.seed(123456789)
spec1.0 <- MCMCglmm(
  fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WB + 
    ColdWar + lnDIS + lnPOPB + time + time2 + time3,
  data = fa.data.nna.0,
  nitt = 11000,
  burnin = 1000,
  prior=list(R = list(V=1, nu=.001)),
  family="cengaussian",
  thin = 10)

summary(spec1.p)
summary(spec1.1)
summary(spec1.0)

# Decomposition analyses --------------------------------------------------

(out <- calculate.qoi.uk(fa.data.nna, group.var = "colony", fit.0 = spec1.0, fit.1 = spec1.1))

## decomposition based on E(Y)
(total.diff.3 <- out $ ey.x1.b1 - out $ ey.x0.b0)
(obs.contrib.3 <- out $ ey.x1.b0 - out $ ey.x0.b0)
obs.contrib.3 / total.diff.3

## decomposition based on pr(y>0)
(total.diff.1 <- out $ pry.x1.b1 - out $ pry.x0.b0)
(obs.contrib.1 <- out $ pry.x1.b0 - out $ pry.x0.b0)
obs.contrib.1 / total.diff.1

## decomposition based on E(Y | Y > 0)
(total.diff.2 <- out $ cy.x1.b1 - out $ cy.x0.b0)
(obs.contrib.2 <- out $ cy.x1.b0 - out $ cy.x0.b0)
obs.contrib.2 / total.diff.2



# Posterior distribution of these QoIs ------------------------------------
nsim <- nrow(spec1.0[[1]])
pb <- txtProgressBar()

# combine results from 3 chains
beta.3c.1 <- spec1.1[[1]]
beta.3c.0 <- spec1.0[[1]]
qoi.list <- c()

for ( i in 1:nsim) {
  beta.0 <- beta.3c.0[ i, ]
  beta.1 <- beta.3c.1[ i, ]  
  qoi.list <- rbind(qoi.list, 
                    as.data.frame(
                      calculate.qoi.full.uk(fa.data.nna, 
                                         group.var = "colony", fit.0 = spec1.0, fit.1 = spec1.1, 
                                         beta.0 = beta.0, beta.1 = beta.1)))
  setTxtProgressBar(pb,i/nsim)
}

# Plot QOIs ---------------------------------------------------------------

# E(Y)
obs.diff <- qoi.list $ ey.x1.b0 - qoi.list $ ey.x0.b0
tot.diff <- qoi.list $ ey.x1.b1 - qoi.list $ ey.x0.b0
ey.obs.pct <- 100 * obs.diff/tot.diff

# Pr(Y>0)
obs.diff <- qoi.list $ pry.x1.b0 - qoi.list $ pry.x0.b0
tot.diff <- qoi.list $ pry.x1.b1 - qoi.list $ pry.x0.b0
pry.obs.pct <- 100 * obs.diff/tot.diff

# E (y | y > 0)
obs.diff <- qoi.list $ cy.x1.b0 - qoi.list $ cy.x0.b0
tot.diff <- qoi.list $ cy.x1.b1 - qoi.list $ cy.x0.b0
cy.obs.pct <- 100 * obs.diff/tot.diff

quantile(ey.obs.pct, probs = c(.05, .5, .95))
quantile(pry.obs.pct, probs = c(.05, .5, .95))
quantile(cy.obs.pct, probs = c(.05, .5, .95))


pdf(file="output/figures/FigureA10_FranceOnly.pdf", width=5.5, height=4)
par(mar=c(2,6,2,3))
plot(0,0, type="n", xlim = c(0, 100), ylim = c(0.75,9.5), axes=F,ann=F)
### shades
shadeColor <- c("gray80", "gray92")
## E(Y )
polygon(y = c(7,7,9,9), x = c(0, 
                              quantile(ey.obs.pct, probs = c(.5)), 
                              quantile(ey.obs.pct, probs = c(.5)), 
                              0), col= shadeColor[1], border=F)
polygon(y = c(7,7,9,9), x = c(quantile(ey.obs.pct, probs = c(.5)), 
                              100, 100, 
                              quantile(ey.obs.pct, probs = c(.5))), 
        col= shadeColor[2], border=F)
lines(x = quantile(ey.obs.pct, probs = c(.05, .95)), y = c(8,8))
lines(x = quantile(ey.obs.pct, probs = c(.05, .05)), y = c(7.75, 8.25))
lines(x = quantile(ey.obs.pct, probs = c(.95, .95)), y = c(7.75, 8.25))
points(x = quantile(ey.obs.pct, probs = .5), y = 8, pch = 19)
## Pr(Y>0)
polygon(y = c(4,4,6,6), x = c(0, quantile(pry.obs.pct, probs = .5), quantile(pry.obs.pct, probs = .5)
                              , 0), col= shadeColor[1], border=F)
polygon(y = c(4,4,6,6), x = c(quantile(pry.obs.pct, probs = .5), 
                              100, 100, quantile(pry.obs.pct, probs = .5)), col= shadeColor[2], border=F)
lines(x = quantile(pry.obs.pct, probs = c(.05, .95)), y = c(5,5))
lines(x = quantile(pry.obs.pct, probs = c(.05, .05)), y = c(4.75, 5.25))
lines(x = quantile(pry.obs.pct, probs = c(.95, .95)), y = c(4.75, 5.25))
points(x = quantile(pry.obs.pct, probs = .5), y = 5, pch = 19)
## E(Y | Y>0)
polygon(y = c(1,1,3,3), x = c(0, quantile(cy.obs.pct, probs = .5),
                              quantile(cy.obs.pct, probs = .5), 0), col= shadeColor[1], border=F)
polygon(y = c(1,1,3,3), x = c(quantile(cy.obs.pct, probs = .5), 
                              100, 100, quantile(cy.obs.pct, probs = .5)), col= shadeColor[2], border=F)
lines(x = quantile(cy.obs.pct, probs = c(.05, .95)), y = c(2,2))
lines(x = quantile(cy.obs.pct, probs = c(.05, .05)), y = c(1.75, 2.25))
lines(x = quantile(cy.obs.pct, probs = c(.95, .95)), y = c(1.75, 2.25))
points(x = quantile(cy.obs.pct, probs = .5), y = 2, pch = 19)

xlabd <- c("0 %", "25 %","50 %","75 %","100 %")
axis(1, cex.axis=1, mgp=c(3,.4,0), labels=xlabd,at=c(0, 25, 50, 75, 100))
axis(2, labels = c("E(Y | Y > 0)", "Pr(Y > 0)", "E(Y)"), at=c(2, 5, 8), las = 1, tick = F)
text(9.5, 9.5, "Observables")
text(50, 9.5, "Saliency")
dev.off()



# Cold War period ---------------------------------------------------------

fa.data <- read.dta("colony-dummy.dta")

## log(0)
fa.data $ tobitDV <- fa.data $ lnGrossAid - log(.001)
fa.data $ tobitDV[fa.data $ AnyGrossAid == 0] <- 0

fa.data.cw1 <- fa.data[fa.data $ ColdWar == 1, ]
fa.data.cw0 <- fa.data[fa.data $ ColdWar == 0, ]

## Listwise delete missing values
fa.data.nna <- fa.data.cw1[, c("tobitDV", "RA", "RB", "WBdum2", "WBdum3", "WBdum4","WBdum5", "WB",
                           "colony", "ccode", "rCCODE",
                           "lnDIS", "ColdWar", "lnPOPB", "dyad", "year", "rCNAME","dCNAME")]
fa.data.nna <- na.omit(fa.data.nna)
fa.data.nna $ RB2 <- fa.data.nna $ RB^2

## time variables
fa.data.nna $ time <- (fa.data.nna $ year - mean(fa.data.nna $ year))/sd(fa.data.nna $ year)
fa.data.nna $ time2 <- (fa.data.nna $ time)^2
fa.data.nna $ time3 <- (fa.data.nna $ time)^3

## donor
length(unique(fa.data.nna $ dCNAME))
fa.data.nna $ donor <- as.numeric(as.factor(fa.data.nna $ dCNAME))

## recipient
length(unique(fa.data.nna $ rCNAME))
fa.data.nna $ recip <- as.numeric(as.factor(fa.data.nna $ rCNAME))

## Censoring
fa.data.nna $ YLow <- ifelse(fa.data.nna $ tobitDV == 0, -Inf, fa.data.nna $ tobitDV)

fa.data.nna.1 <- fa.data.nna[fa.data.nna $ colony == 1, ]
fa.data.nna.0 <- fa.data.nna[fa.data.nna $ colony == 0, ]

nrow(fa.data.nna)
nrow(fa.data.nna.1)
nrow(fa.data.nna.0)

mean(fa.data.nna.1 $ tobitDV)
mean(fa.data.nna.0 $ tobitDV)



# Tobit models ------------------------------------------------------------

## Pooled

set.seed(123456789)
spec1.p <- MCMCglmm(
  fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WBdum2 + WBdum3 + WBdum4 + WBdum5 + 
    lnDIS + lnPOPB + time + time2 + time3 + 
    colony,
  random = ~us(1):dCNAME + us(1):rCNAME, 
  data = fa.data.nna,
  nitt = 2000,
  burnin = 1000,
  prior=list(R = list(V=1, nu=.001), G = list(G1=list(V=1, nu=.001), G2 = list(V=1, nu=.001))),
  family="cengaussian",
  thin = 10)

summary(spec1.p)


## Colony == 1 (30 sec)
set.seed(123456789)
spec1.1 <- MCMCglmm(
  fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WBdum2 + WBdum3 + WBdum4 + WBdum5 + 
    lnDIS + lnPOPB + time + time2 + time3,
  random = ~us(1):dCNAME + us(1):rCNAME, 
  data = fa.data.nna.1,
  nitt = 11000,
  burnin = 1000,
  prior=list(R = list(V=1, nu=.001), G = list(G1=list(V=1, nu=.001), G2 = list(V=1, nu=.001))),
  family="cengaussian",
  thin = 10)

## Colony == 0 (14 min)
set.seed(123456789)
spec1.0 <- MCMCglmm(
   fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WBdum2 + WBdum3 + WBdum4 + WBdum5 + 
     lnDIS + lnPOPB + time + time2 + time3,
   random = ~us(1):dCNAME + us(1):rCNAME, 
   data = fa.data.nna.0,
   nitt = 11000,
   burnin = 1000,
   prior=list(R = list(V=1, nu=.001), G = list(G1=list(V=1, nu=.001), G2 = list(V=1, nu=.001))),
   family="cengaussian",
   thin = 10)

summary(spec1.p)
summary(spec1.1)
summary(spec1.0)

# Decomposition analyses --------------------------------------------------

# Calculate aggregate observables

(out <- calculate.qoi(fa.data.nna, group.var = "colony", fit.0 = spec1.0, fit.1 = spec1.1))

## decomposition based on E(Y)
(total.diff.3 <- out $ ey.x1.b1 - out $ ey.x0.b0)
(obs.contrib.3 <- out $ ey.x1.b0 - out $ ey.x0.b0)
obs.contrib.3 / total.diff.3

## decomposition based on pr(y>0)
(total.diff.1 <- out $ pry.x1.b1 - out $ pry.x0.b0)
(obs.contrib.1 <- out $ pry.x1.b0 - out $ pry.x0.b0)
obs.contrib.1 / total.diff.1

## decomposition based on E(Y | Y > 0)
(total.diff.2 <- out $ cy.x1.b1 - out $ cy.x0.b0)
(obs.contrib.2 <- out $ cy.x1.b0 - out $ cy.x0.b0)
obs.contrib.2 / total.diff.2



# Posterior distribution of these QoIs ------------------------------------
nsim <- nrow(spec1.0[[1]])
pb <- txtProgressBar()

# combine results from 3 chains
beta.3c.1 <- spec1.1[[1]]
beta.3c.0 <- spec1.0[[1]]
qoi.list <- c()

for ( i in 1:nsim) {
  beta.0 <- beta.3c.0[ i, ]
  beta.1 <- beta.3c.1[ i, ]  
  qoi.list <- rbind(qoi.list, 
                    as.data.frame(
                      calculate.qoi.full(fa.data.nna, 
                                         group.var = "colony", fit.0 = spec1.0, fit.1 = spec1.1, 
                                         beta.0 = beta.0, beta.1 = beta.1)))
  setTxtProgressBar(pb,i/nsim)
}

# Plot QOIs ---------------------------------------------------------------

# E(Y)
obs.diff <- qoi.list $ ey.x1.b0 - qoi.list $ ey.x0.b0
tot.diff <- qoi.list $ ey.x1.b1 - qoi.list $ ey.x0.b0
ey.obs.pct <- 100 * obs.diff/tot.diff

# Pr(Y>0)
obs.diff <- qoi.list $ pry.x1.b0 - qoi.list $ pry.x0.b0
tot.diff <- qoi.list $ pry.x1.b1 - qoi.list $ pry.x0.b0
pry.obs.pct <- 100 * obs.diff/tot.diff

# E (y | y > 0)
obs.diff <- qoi.list $ cy.x1.b0 - qoi.list $ cy.x0.b0
tot.diff <- qoi.list $ cy.x1.b1 - qoi.list $ cy.x0.b0
cy.obs.pct <- 100 * obs.diff/tot.diff

quantile(ey.obs.pct, probs = c(.05, .5, .95))
quantile(pry.obs.pct, probs = c(.05, .5, .95))
quantile(cy.obs.pct, probs = c(.05, .5, .95))


pdf(file="output/figures/FigureA7_ColdWar.pdf", width=5.5, height=4)
par(mar=c(2,6,2,3))
plot(0,0, type="n", xlim = c(0, 100), ylim = c(0.75,9.5), axes=F,ann=F)
### shades
shadeColor <- c("gray80", "gray92")
## E(Y )
polygon(y = c(7,7,9,9), x = c(0, 
                              quantile(ey.obs.pct, probs = c(.5)), 
                              quantile(ey.obs.pct, probs = c(.5)), 
                              0), col= shadeColor[1], border=F)
polygon(y = c(7,7,9,9), x = c(quantile(ey.obs.pct, probs = c(.5)), 
                              100, 100, 
                              quantile(ey.obs.pct, probs = c(.5))), 
        col= shadeColor[2], border=F)
lines(x = quantile(ey.obs.pct, probs = c(.05, .95)), y = c(8,8))
lines(x = quantile(ey.obs.pct, probs = c(.05, .05)), y = c(7.75, 8.25))
lines(x = quantile(ey.obs.pct, probs = c(.95, .95)), y = c(7.75, 8.25))
points(x = quantile(ey.obs.pct, probs = .5), y = 8, pch = 19)
## Pr(Y>0)
polygon(y = c(4,4,6,6), x = c(0, quantile(pry.obs.pct, probs = .5), quantile(pry.obs.pct, probs = .5)
                              , 0), col= shadeColor[1], border=F)
polygon(y = c(4,4,6,6), x = c(quantile(pry.obs.pct, probs = .5), 
                              100, 100, quantile(pry.obs.pct, probs = .5)), col= shadeColor[2], border=F)
lines(x = quantile(pry.obs.pct, probs = c(.05, .95)), y = c(5,5))
lines(x = quantile(pry.obs.pct, probs = c(.05, .05)), y = c(4.75, 5.25))
lines(x = quantile(pry.obs.pct, probs = c(.95, .95)), y = c(4.75, 5.25))
points(x = quantile(pry.obs.pct, probs = .5), y = 5, pch = 19)
## E(Y | Y>0)
polygon(y = c(1,1,3,3), x = c(0, quantile(cy.obs.pct, probs = .5),
                              quantile(cy.obs.pct, probs = .5), 0), col= shadeColor[1], border=F)
polygon(y = c(1,1,3,3), x = c(quantile(cy.obs.pct, probs = .5), 
                              100, 100, quantile(cy.obs.pct, probs = .5)), col= shadeColor[2], border=F)
lines(x = quantile(cy.obs.pct, probs = c(.05, .95)), y = c(2,2))
lines(x = quantile(cy.obs.pct, probs = c(.05, .05)), y = c(1.75, 2.25))
lines(x = quantile(cy.obs.pct, probs = c(.95, .95)), y = c(1.75, 2.25))
points(x = quantile(cy.obs.pct, probs = .5), y = 2, pch = 19)

xlabd <- c("0 %", "25 %","50 %","75 %","100 %")
axis(1, cex.axis=1, mgp=c(3,.4,0), labels=xlabd,at=c(0, 25, 50, 75, 100))
axis(2, labels = c("E(Y | Y > 0)", "Pr(Y > 0)", "E(Y)"), at=c(2, 5, 8), las = 1, tick = F)
text(9.5, 9.5, "Observables")
text(50, 9.5, "Saliency")
dev.off()



# Post Cold War period ----------------------------------------------------

## Listwise delete missing values
fa.data.nna <- fa.data.cw0[, c("tobitDV", "RA", "RB", "WBdum2", "WBdum3", "WBdum4","WBdum5", "WB",
                           "colony", "ccode", "rCCODE",
                           "lnDIS", "ColdWar", "lnPOPB", "dyad", "year", "rCNAME","dCNAME")]
fa.data.nna <- na.omit(fa.data.nna)
fa.data.nna $ RB2 <- fa.data.nna $ RB^2

## time variables
fa.data.nna $ time <- (fa.data.nna $ year - mean(fa.data.nna $ year))/sd(fa.data.nna $ year)
fa.data.nna $ time2 <- (fa.data.nna $ time)^2
fa.data.nna $ time3 <- (fa.data.nna $ time)^3

## donor
length(unique(fa.data.nna $ dCNAME))
fa.data.nna $ donor <- as.numeric(as.factor(fa.data.nna $ dCNAME))

## recipient
length(unique(fa.data.nna $ rCNAME))
fa.data.nna $ recip <- as.numeric(as.factor(fa.data.nna $ rCNAME))

## Censoring
fa.data.nna $ YLow <- ifelse(fa.data.nna $ tobitDV == 0, -Inf, fa.data.nna $ tobitDV)

fa.data.nna.1 <- fa.data.nna[fa.data.nna $ colony == 1, ]
fa.data.nna.0 <- fa.data.nna[fa.data.nna $ colony == 0, ]

nrow(fa.data.nna)
nrow(fa.data.nna.1)
nrow(fa.data.nna.0)

mean(fa.data.nna.1 $ tobitDV)
mean(fa.data.nna.0 $ tobitDV)



# Tobit models ------------------------------------------------------------

## Pooled

set.seed(123456789)
spec1.p <- MCMCglmm(
  fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WBdum2 + WBdum3 + WBdum4 + WBdum5 + 
    lnDIS + lnPOPB + time + time2 + time3 + 
    colony,
  random = ~us(1):dCNAME + us(1):rCNAME, 
  data = fa.data.nna,
  nitt = 2000,
  burnin = 1000,
  prior=list(R = list(V=1, nu=.001), G = list(G1=list(V=1, nu=.001), G2 = list(V=1, nu=.001))),
  family="cengaussian",
  thin = 10)

summary(spec1.p)


## Colony == 1

set.seed(123456789)
spec1.1 <- MCMCglmm(
  fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WBdum2 + WBdum3 + WBdum4 + WBdum5 + 
    lnDIS + lnPOPB + time + time2 + time3,
  random = ~us(1):dCNAME + us(1):rCNAME, 
  data = fa.data.nna.1,
  nitt = 11000,
  burnin = 1000,
  prior=list(R = list(V=1, nu=.001), G = list(G1=list(V=1, nu=.001), G2 = list(V=1, nu=.001))),
  family="cengaussian",
  thin = 10)

## Colony == 0

set.seed(123456789)
spec1.0 <- MCMCglmm(
   fixed = cbind(YLow, tobitDV) ~ RA + RB + RB2 + WBdum2 + WBdum3 + WBdum4 + WBdum5 + 
     lnDIS + lnPOPB + time + time2 + time3,
   random = ~us(1):dCNAME + us(1):rCNAME, 
   data = fa.data.nna.0,
   nitt = 11000,
   burnin = 1000,
   prior=list(R = list(V=1, nu=.001), G = list(G1=list(V=1, nu=.001), G2 = list(V=1, nu=.001))),
   family="cengaussian",
   thin = 10)

summary(spec1.p)
summary(spec1.1)
summary(spec1.0)

# Decomposition analyses --------------------------------------------------

# Calculate aggregate observables

(out <- calculate.qoi(fa.data.nna, group.var = "colony", fit.0 = spec1.0, fit.1 = spec1.1))

## decomposition based on E(Y)
(total.diff.3 <- out $ ey.x1.b1 - out $ ey.x0.b0)
(obs.contrib.3 <- out $ ey.x1.b0 - out $ ey.x0.b0)
obs.contrib.3 / total.diff.3

## decomposition based on pr(y>0)
(total.diff.1 <- out $ pry.x1.b1 - out $ pry.x0.b0)
(obs.contrib.1 <- out $ pry.x1.b0 - out $ pry.x0.b0)
obs.contrib.1 / total.diff.1

## decomposition based on E(Y | Y > 0)
(total.diff.2 <- out $ cy.x1.b1 - out $ cy.x0.b0)
(obs.contrib.2 <- out $ cy.x1.b0 - out $ cy.x0.b0)
obs.contrib.2 / total.diff.2



# Posterior distribution of these QoIs ------------------------------------
nsim <- nrow(spec1.0[[1]])
pb <- txtProgressBar()

# combine results from 3 chains
beta.3c.1 <- spec1.1[[1]]
beta.3c.0 <- spec1.0[[1]]
qoi.list <- c()

for ( i in 1:nsim) {
  beta.0 <- beta.3c.0[ i, ]
  beta.1 <- beta.3c.1[ i, ]  
  qoi.list <- rbind(qoi.list, 
                    as.data.frame(
                      calculate.qoi.full(fa.data.nna, 
                                         group.var = "colony", fit.0 = spec1.0, fit.1 = spec1.1, 
                                         beta.0 = beta.0, beta.1 = beta.1)))
  setTxtProgressBar(pb,i/nsim)
}

# Plot QOIs ---------------------------------------------------------------

# E(Y)
obs.diff <- qoi.list $ ey.x1.b0 - qoi.list $ ey.x0.b0
tot.diff <- qoi.list $ ey.x1.b1 - qoi.list $ ey.x0.b0
ey.obs.pct <- 100 * obs.diff/tot.diff

# Pr(Y>0)
obs.diff <- qoi.list $ pry.x1.b0 - qoi.list $ pry.x0.b0
tot.diff <- qoi.list $ pry.x1.b1 - qoi.list $ pry.x0.b0
pry.obs.pct <- 100 * obs.diff/tot.diff

# E (y | y > 0)
obs.diff <- qoi.list $ cy.x1.b0 - qoi.list $ cy.x0.b0
tot.diff <- qoi.list $ cy.x1.b1 - qoi.list $ cy.x0.b0
cy.obs.pct <- 100 * obs.diff/tot.diff

quantile(ey.obs.pct, probs = c(.05, .5, .95))
quantile(pry.obs.pct, probs = c(.05, .5, .95))
quantile(cy.obs.pct, probs = c(.05, .5, .95))


pdf(file="output/figures/FigureA8_PostColdWar.pdf", width=6, height=4.5)
par(mar=c(2,6,2,3))
plot(0,0, type="n", xlim = c(-50, 100), ylim = c(0.75,9.5), axes=F,ann=F)
### shades
shadeColor <- c("gray80", "gray92")
## E(Y )
polygon(y = c(7,7,9,9), x = c(-50, 
                              quantile(ey.obs.pct, probs = c(.5)), 
                              quantile(ey.obs.pct, probs = c(.5)), 
                              -50), col= "white", border=F)
polygon(y = c(7,7,9,9), x = c(quantile(ey.obs.pct, probs = c(.5)), 
                              100, 100, 
                              quantile(ey.obs.pct, probs = c(.5))), 
        col= shadeColor[2], border=F)
lines(x = quantile(ey.obs.pct, probs = c(.05, .95)), y = c(8,8))
lines(x = quantile(ey.obs.pct, probs = c(.05, .05)), y = c(7.75, 8.25))
lines(x = quantile(ey.obs.pct, probs = c(.95, .95)), y = c(7.75, 8.25))
points(x = quantile(ey.obs.pct, probs = .5), y = 8, pch = 19)
## Pr(Y>0)
polygon(y = c(4,4,6,6), x = c(-50, quantile(pry.obs.pct, probs = .5), quantile(pry.obs.pct, probs = .5)
                              , -50), col= "white", border=F)
polygon(y = c(4,4,6,6), x = c(quantile(pry.obs.pct, probs = .5), 
                              100, 100, quantile(pry.obs.pct, probs = .5)), col= shadeColor[2], border=F)
lines(x = quantile(pry.obs.pct, probs = c(.05, .95)), y = c(5,5))
lines(x = quantile(pry.obs.pct, probs = c(.05, .05)), y = c(4.75, 5.25))
lines(x = quantile(pry.obs.pct, probs = c(.95, .95)), y = c(4.75, 5.25))
points(x = quantile(pry.obs.pct, probs = .5), y = 5, pch = 19)
## E(Y | Y>0)
polygon(y = c(1,1,3,3), x = c(-50, quantile(cy.obs.pct, probs = .5),
                              quantile(cy.obs.pct, probs = .5), -50), col= "white", border=F)
polygon(y = c(1,1,3,3), x = c(quantile(cy.obs.pct, probs = .5), 
                              100, 100, quantile(cy.obs.pct, probs = .5)), col= shadeColor[2], border=F)
lines(x = quantile(cy.obs.pct, probs = c(.05, .95)), y = c(2,2))
lines(x = quantile(cy.obs.pct, probs = c(.05, .05)), y = c(1.75, 2.25))
lines(x = quantile(cy.obs.pct, probs = c(.95, .95)), y = c(1.75, 2.25))
points(x = quantile(cy.obs.pct, probs = .5), y = 2, pch = 19)

xlabd <- c("-50 %", "-25 %","0 %","25 %","50 %", "75%", "100%")
axis(1, cex.axis=1, mgp=c(3,.4,0), labels=xlabd,at=c(-50, -25, 0, 25, 50, 75, 100))
axis(2, labels = c("E(Y | Y > 0)", "Pr(Y > 0)", "E(Y)"), at=c(2, 5, 8), las = 1, tick = F)
text(-35, 9.5, "Observables")
text(30, 9.5, "Saliency")
dev.off()


# End of file
